import sys
import time
import matplotlib.pyplot as plt
import pylab
import numpy as np
import scipy
import scipy.optimize as opt
import scipy.sparse
import sklearn.datasets
\frac{1}{N}\sum\limits_1^n\sigma_w(x_i)\sigma_w(-x_i)x_i x_i^t$
Если у функции несколько локальных минимумов, то конечно результат работы зависит от выбора начальной точки. Говоря о данной задаче, можно заметить что в условии остановки фигурирует градиент в наальной точке. То есть чем больше будет начальный градиент, тем с худшей точностью будет посчитан ответ. Также нельзя выбирать $w_0$ слишком большой по модулю, иначе если $\langle x, w \rangle$ будет хоть сколько-то большим по модулю (что более вероятно при большой размерности), sigmoid = $1/1+\exp(-\langle x, w \rangle)$ будет либо очень близким к 0, либо к 1
Функции оракулов:
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def oracle(w, X, labels, outers=None, order=0, no_function=False):
Xw = X.dot(w)
sigmoids = [sigmoid(l * xw) for xw, l in zip(Xw, labels)]
f = 0
if not no_function:
f = -1 / X.shape[0] * np.sum([np.log(s) for s in sigmoids])
if order == 0:
return f
grad_coeffs = np.array([l * (1 - s) for s, l in zip(sigmoids, labels)])
X1 = X.multiply(grad_coeffs.reshape(-1, 1))
g = -1 / X.shape[0] * np.array(X1.sum(axis=0)).reshape(X.shape[1])
if order == 1:
return f, g, 0
hess_coeffs = np.array([s * (1 - s) for s in sigmoids])
# outers - массив x x^t flatten, который можно посчитать заранее
if outers is None:
h = 1 / X.shape[0] * np.sum([np.outer(x, x) * hess_coeffs[i] for i, x in enumerate(X.todense())], axis=0)
else:
outers1 = outers.multiply(hess_coeffs.reshape(-1, 1))
h = 1 / X.shape[0] * np.array(outers1.sum(axis=0)).reshape((X.shape[1], X.shape[1]))
if order == 2:
return f, g, h
def function(w, X, labels):
return oracle(w, X, labels)
def gradient(w, X, labels):
return oracle(w, X, labels, order=1, no_function=True)[1]
def hessian(w, X, labels):
return oracle(w, X, labels, order=2, no_function=True)[2]
#численное дифференцирование
def der(fun, point, epsilon=np.sqrt(sys.float_info.epsilon)):
return (fun(point + epsilon) - fun(point)) / epsilon
#считает среднюю относительную погрешность по всем кординатным осям в рандомной точке из [-R, R]^dim
def check_gradient(fun, grad, R, dim, args=(), diff_eps=np.sqrt(sys.float_info.epsilon)):
w = np.random.random(dim)
w = (2 * w - 1) * R
dw = np.eye(dim)
g = grad(w, *args)
norm = np.dot(g, g)
difs = [np.abs((np.dot(g, dw_i) - der(lambda t: fun(w + t * dw_i, *args), 0, diff_eps))) for dw_i in dw]
return np.average(difs) / norm
#считает среднюю относительную погрешность по всем парам кординатных осей в рандомной точке из [-R, R]^dim
def check_hessian(grad, hess, R, dim, args=(), diff_eps=np.sqrt(sys.float_info.epsilon)):
w = np.random.random(dim)
w = (2 * w - 1) * R
dw = np.eye(dim)
h = hess(w, *args)
norm = np.dot(h.flatten(), h.flatten())
difs = [np.abs(np.dot(dw1, np.dot(h, dw2)) - der(lambda t: np.dot(grad(w + t * dw1, *args), dw2), 0, diff_eps)) for dw1 in dw
for dw2 in dw]
return np.average(difs) / norm
a1a:
a1a = sklearn.datasets.load_svmlight_file('../data/a1a.txt')
X = a1a[0]
dummy = scipy.sparse.csr_matrix([[1] for i in range(X.shape[0])])
X_a1a = scipy.sparse.hstack([X, dummy])
labels_a1a = a1a[1]
breast cancer: (координаты нормированы от -1 до 1)
breast_cancer = sklearn.datasets.load_svmlight_file('../data/breast-cancer_scale.txt')
X = breast_cancer[0]
dummy = scipy.sparse.csr_matrix([[1] for i in range(X.shape[0])])
X_cancer = scipy.sparse.hstack([X, dummy])
labels_cancer = breast_cancer[1]-3
Случайный сгенерированный:
def random_dataset(alpha, beta):
xs = np.random.normal(size=(1000, alpha.shape[0]))
labels = np.array([np.sign(np.dot(alpha, x) + beta) for x in xs])
return xs, labels
alpha = 2 * np.random.random(11) - 1
X, labels_rand = random_dataset(alpha[:-1], alpha[-1])
dummy = scipy.sparse.csr_matrix([[1] for i in range(X.shape[0])])
X_rand = scipy.sparse.hstack([X, dummy])
Посмотрим, правильно ли посчитаны градиент и гессиан:
print('погрешность градиента =', check_gradient(function, gradient, 1, X_a1a.shape[1], args=[X_a1a, labels_a1a]))
print('погрешность гессиана =', check_hessian(gradient, hessian, 1, X_a1a.shape[1], args=[X_a1a, labels_a1a]))
#долго - большая размерность
print('погрешность градиента =', check_gradient(function, gradient, 1, X_cancer.shape[1], args=[X_cancer, labels_cancer]))
print('погрешность гессиана =', check_hessian(gradient, hessian, 1, X_cancer.shape[1], args=[X_cancer, labels_cancer]))
print('погрешность градиента =', check_gradient(function, gradient, 1, X_rand.shape[1], args=[X_rand, labels_rand]))
print('погрешность гессиана =', check_hessian(gradient, hessian, 1, X_rand.shape[1], args=[X_rand, labels_rand]))
Результаты говорят о том, что всё правильно
#на заданном интервале: возвращает точку минимума и число вызовов функции
def golden_search_bounded(fun, a0, b0, eps=100*sys.float_info.epsilon, args=()):
ratio = (1 + 5 ** 0.5) / 2
def step(a, b, c, fc, onumber):
if b - a < eps:
return a, fun(a, *args), onumber + 1
else:
d = a + b - c
fd = fun(d)
if c > d:
c, d = d, c
fc, fd = fd, fc
if fc < fd:
return step(a, d, c, fc, onumber + 1)
else:
return step(c, b, d, fd, onumber + 1)
c0 = a0 + (b0 - a0) / ratio
solution = step(a0, b0, c0, fun(c0, *args), 0)
return solution[0], solution[2]
#общий случай используя bracket
def golden_search(fun, eps=100*sys.float_info.epsilon, args=()):
a, _, b, _, _, _, onumber = opt.bracket(fun, args=args)
if b < a:
a, b = b, a
gsb = golden_search_bounded(fun, a, b, eps=eps, args=args)
return gsb[0], gsb[1] + onumber
Метод для построения графиков:
def graph(x, y, x_l=None, y_l=None, title=None):
fig, ax = plt.subplots()
ax.plot(x, y)
ax.set(xlabel=x_l, ylabel=y_l, title=title)
ax.grid()
plt.show()
Сгенерируем полиномы 2, 3 и 5 степени и построим графики чтобы понять с каких интервалов начинать поиск минимума:
c2, c3, c5 = [-0.95717287, -0.75630972, 0.72343828], [0.97490814, 0.80688161, 0.05165432, -0.52669906], [0.74540317,
-0.50950527,
0.96825081,
-0.0243777,
-0.33768442,
0.13532899]
def pol2(x):
return c2[0] + c2[1] * x + c2[2] * x ** 2
def pol3(x):
return c3[0] + c3[1] * x + c3[2] * x ** 2 + c3[3] * x ** 3
def pol5(x):
return c5[0] + c5[1] * x + c5[2] * x ** 2 + c5[3] * x ** 3 + c5[4] * x ** 4 + c5[5] * x ** 5
xs = np.arange(-2, 2, 0.001)
print(str(c2[0]) + ' + ' + str(c2[1]) + 'x + ' + str(c2[2]) + 'x ^ 2')
graph(xs, [pol2(x) for x in xs])
print(str(c3[0]) + ' + ' + str(c3[1]) + 'x + ' + str(c3[2]) + 'x ^ 2 + ' + str(c3[3]) + 'x ^ 3')
graph(xs, [pol3(x) for x in xs])
print(str(c5[0]) + ' + ' + str(c5[1]) + 'x + ' + str(c5[2]) + 'x ^ 2 + ' + str(c5[3]) + 'x ^ 3 +' + str(
c5[4]) + 'x ^ 4 + ' + str(c5[5]) + 'x ^ 5')
graph(xs, [pol5(x) for x in xs])
Сравним метод с методом Брэнта:
x, fev = golden_search_bounded(pol2, -0.5, 1.5)
opt1 = opt.minimize_scalar(pol2, bracket=[-0.5, 0, 1.5], method='brent')
x1, fev1 = opt1['x'], opt1['nfev']
print('x golden =', x, ', nfev =', fev)
print('x brent =', x1, ', nfev =', fev1)
print('pol2(x golden) - pol2(x brent) =', pol2(x) - pol2(x1))
x, fev = golden_search_bounded(pol3, -1.5, 0)
opt1 = opt.minimize_scalar(pol3, bracket=[-1.5, -1, 0], method='brent')
x1, fev1 = opt1['x'], opt1['nfev']
print('x golden =', x, ', nfev =', fev)
print('x brent =', x1, ', nfev =', fev1)
print('pol3(x golden) - pol3(x brent) =', pol3(x) - pol3(x1))
x, fev = golden_search_bounded(pol5, -0.5, 1.0)
opt1 = opt.minimize_scalar(pol5, bracket=[-0.5, 0, 1], method='brent')
x1, fev1 = opt1['x'], opt1['nfev']
print('x golden =', x, ', nfev =', fev)
print('x brent =', x1, ', nfev =', fev1)
print('pol5(x golden) - pol5(x brent) =', pol5(x) - pol5(x1))
Метод Брэнта лучше
def armiho(fun, c=0.1, k=10, f0=None, df0=None):
x = 1
oracle = 0
if f0 == None:
f0 = fun(0)
oracle += 1
if df0 is None:
df0 = der(fun, 0)
oracle += 2
fx = fun(x)
fkx = fun(k * x)
oracle += 2
while True:
if fx > f0 + c * df0 * x:
x /= k
fkx = fx
fx = fun(x)
oracle += 1
elif fkx < f0 + k * c * df0 * x:
x *= k
fx = fkx
fkx = fun(k*x)
oracle += 1
else:
break
return x, fx, oracle
def nester(fun, f0, d, L0=1):
L = L0
oracle = 0
fx = fun(1 / L)
oracle += 1
while fx > f0 - 1 / (2 * L) * np.dot(d, d):
L *= 2
fx = fun(1 / L)
oracle += 1
return L, fx, oracle
Чтобы решить систему $Hx = y$, где $H$ - положительно определённая матрица, будем использовать разложение Холецкого:
def solve(G, d):
if np.linalg.matrix_rank(G) < G.shape[0]:
G = G + 0.0001 * np.eye(G.shape[0])
L = np.linalg.cholesky(G)
Ltx = scipy.linalg.solve_triangular(L, d, lower=True)
return scipy.linalg.solve_triangular(np.transpose(L), Ltx, lower=False)
Разница только в выборе направления - поэтому всё в одном методе
Параметры:
Возвращает:
В реализации в main.py возвращаются только последние элементы перечисленных массивов
def optimization_task(oracle, start, method='gradient descent', one_dim_search=None, args=(),
search_kwargs=dict([]), epsilon=0, max_iter=float('inf'), max_time=float('inf')):
iterations, fcalls, gcalls, hcalls, times, values, grad_ratios = [], [], [], [], [], [], []
start_time, k, fc, gc, hc = time.time(), 0, 0, 0, 0
x = start
if method == 'gradient descent':
ord = 1
if one_dim_search is None:
one_dim_search = 'brent'
elif method == 'newton':
ord = 2
if one_dim_search is None:
one_dim_search = 'unit step'
f0, g0, _ = oracle(x, *args, order=1)
fc += 1
gc += 1
fk = f0
if one_dim_search == 'nester':
L, L0 = 2, 0
if 'L0' in search_kwargs.keys():
L0 = 2 * search_kwargs['L0']
L = L0
flag = one_dim_search == 'armiho' or one_dim_search == 'wolf' or one_dim_search == 'nester'
while True:
f_, gk, hk = oracle(x, *args, order=ord, no_function=flag)
# в реализации в main.py всегда стоит флаг no_function
if not flag:
fk = f_
fc += 1
gc += 1
if ord == 2:
hc += 1
if method == 'gradient descent':
d = -gk
elif method == 'newton':
d = -solve(hk, gk)
ratio = np.dot(gk, gk) / np.dot(g0, g0)
iterations.append(k)
k += 1
fcalls.append(fc)
gcalls.append(gc)
hcalls.append(hc)
times.append(time.time() - start_time)
values.append(fk)
grad_ratios.append(ratio)
if ratio <= epsilon or k > max_iter or times[-1] > max_time:
break
fun = lambda alpha: oracle(x + d * alpha, *args)
if one_dim_search == 'unit step':
alpha = 1
elif one_dim_search == 'golden':
alpha, oracle_counter = golden_search(fun, **search_kwargs)
fc += oracle_counter
elif one_dim_search == 'brent':
solution = opt.minimize_scalar(fun)
alpha = solution['x']
fc += solution['nfev']
elif one_dim_search == 'armiho':
alpha, fk, oracle_counter = armiho(fun, **search_kwargs, f0=fk, df0=np.dot(gk, d))
fc += oracle_counter
elif one_dim_search == 'wolf':
f_for_wolf = lambda z: oracle(z, *args)
g_for_wolf = lambda z: oracle(z, *args, order=1, no_function=True)[1]
solution = opt.line_search(f_for_wolf, g_for_wolf, x, d, **search_kwargs, gfk=gk, old_fval=fk)
alpha = solution[0]
if alpha == None:
alpha = 1
fc += solution[1]
gc += solution[2]
fk = solution[3]
if fk == None:
fk = fun(1)
elif one_dim_search == 'nester':
L, fk, oracle_counter = nester(fun, fk, gk, L0=max(L / 2, L0))
alpha = 1 / L
fc += oracle_counter
else:
alpha = one_dim_search(fun, *args)
x = x + d * alpha
return x, iterations, times, values, grad_ratios, fcalls, gcalls, hcalls
def graph_several(xs, ys, labels, end=None, beg=0, x_l=None, y_l=None, title=None):
pylab.rcParams['figure.figsize'] = 15, 7
pylab.subplot(1, 1, 1)
if end is None:
end = max([max(x) for x in xs])
ends = [np.argmin([np.abs(p - end) for p in x]) for x in xs]
begs = [np.argmin([np.abs(p - beg) for p in x]) for x in xs]
xs1 = [xs[i][begs[i]:ends[i]+1] for i in range(len(xs))]
ys1 = [ys[i][begs[i]:ends[i]+1] for i in range(len(xs))]
def logs(y):
return [np.log(v) for v in y]
for i in range(len(xs)):
pylab.plot(xs1[i], logs(ys1[i]), label=labels[i])
pylab.title(title)
pylab.xlabel(x_l)
pylab.ylabel(y_l)
pylab.grid()
pylab.legend()
pylab.show()
def graph_several2(xs, ys, labels, end1=None, beg1=0, end2=None, beg2=0, x_l=None, y_l=None, title=None):
pylab.rcParams['figure.figsize'] = 15, 7
pylab.subplot(1, 2, 1)
if end1 is None:
end1 = max([max(x) for x in xs])
ends = [np.argmin([np.abs(p - end1) for p in x]) for x in xs]
begs = [np.argmin([np.abs(p - beg1) for p in x]) for x in xs]
xs1 = [xs[i][begs[i]:ends[i]+1] for i in range(len(xs))]
ys1 = [ys[i][begs[i]:ends[i]+1] for i in range(len(xs))]
def logs(y):
return [np.log(v) for v in y]
for i in range(len(xs)):
pylab.plot(xs1[i], logs(ys1[i]), label=labels[i])
pylab.title(title)
pylab.xlabel(x_l)
pylab.ylabel(y_l)
pylab.grid()
pylab.legend()
pylab.subplot(1, 2, 2)
if end2 is None:
end2 = max([max(x) for x in xs])
ends = [np.argmin([np.abs(p - end2) for p in x]) for x in xs]
begs = [np.argmin([np.abs(p - beg2) for p in x]) for x in xs]
xs1 = [xs[i][begs[i]:ends[i]+1] for i in range(len(xs))]
ys1 = [ys[i][begs[i]:ends[i]+1] for i in range(len(xs))]
def logs(y):
return [np.log(v) for v in y]
for i in range(len(xs)):
pylab.plot(xs1[i], logs(ys1[i]), label=labels[i])
pylab.title(title)
pylab.xlabel(x_l)
pylab.ylabel(y_l)
pylab.grid()
pylab.legend()
pylab.show()
w0_a1a = (2 * np.random.random(X_a1a.shape[1]) - 1) / 2
#начальная точка
w_true_a1a = opt.minimize(function, w0_a1a, args=(X_a1a, labels_a1a), jac = gradient)['x']
f_min_a1a = function(w_true_a1a, X_a1a, labels_a1a)
print('a1a function minimum =', f_min_a1a)
w0_cancer = (2 * np.random.random(X_cancer.shape[1]) - 1) / 2
#начальная точка
w_true_cancer = opt.minimize(function, w0_cancer, args=(X_cancer, labels_cancer), jac = gradient)['x']
f_min_cancer = function(w_true_cancer, X_cancer, labels_cancer)
print('cancer function minimum =', f_min_cancer)
w0_rand = (2 * np.random.random(X_rand.shape[1]) - 1) / 2
#начальная точка
f_min_rand = 0
print('rand function minimum =', f_min_rand)
a1, i1, t1, v1, r1, fc1, gc1, hc1 = optimization_task(oracle, w0_a1a, method='gradient descent', args=[X_a1a, labels_a1a],
one_dim_search='armiho', max_time=10, search_kwargs=dict([('c', 0.9)]))
a2, i2, t2, v2, r2, fc2, gc2, hc2 = optimization_task(oracle, w0_a1a, method='gradient descent', args=[X_a1a, labels_a1a],
one_dim_search='armiho', max_time=10, search_kwargs=dict([('c', 0.7)]))
a3, i3, t3, v3, r3, fc3, gc3, hc3 = optimization_task(oracle, w0_a1a, method='gradient descent', args=[X_a1a, labels_a1a],
one_dim_search='armiho', max_time=10, search_kwargs=dict([('c', 0.5)]))
a4, i4, t4, v4, r4, fc4, gc4, hc4 = optimization_task(oracle, w0_a1a, method='gradient descent', args=[X_a1a, labels_a1a],
one_dim_search='armiho', max_time=10, search_kwargs=dict([('c', 0.3)]))
a5, i5, t5, v5, r5, fc5, gc5, hc5 = optimization_task(oracle, w0_a1a, method='gradient descent', args=[X_a1a, labels_a1a],
one_dim_search='armiho', max_time=10, search_kwargs=dict([('c', 0.1)]))
vs = np.array([v1-f_min_a1a, v2-f_min_a1a, v3-f_min_a1a, v4-f_min_a1a, v5-f_min_a1a])
graph_several([t1, t2, t3, t4, t5], vs,
labels=['0.9', '0.7', '0.5', '0.3', '0.1'],
x_l='time', y_l='$\log(|f(w_k)-f_{min}|)$', title='результаты для разных c от времени (a1a)')
vs = np.array([v1-f_min_a1a, v2-f_min_a1a, v3-f_min_a1a, v4-f_min_a1a, v5-f_min_a1a])
graph_several([i1, i2, i3, i4, i5], vs,
labels=['0.9', '0.7', '0.5', '0.3', '0.1'],
x_l='iteration', y_l='$\log(|f(w_k)-f_{min}|)$', title='результаты для разных c от номера итерации (a1a)')
a1, i1, t1, v1, r1, fc1, gc1, hc1 = optimization_task(oracle, w0_cancer, method='gradient descent', args=[X_cancer, labels_cancer],
one_dim_search='armiho', max_time=10, search_kwargs=dict([('c', 0.9)]))
a2, i2, t2, v2, r2, fc2, gc2, hc2 = optimization_task(oracle, w0_cancer, method='gradient descent', args=[X_cancer, labels_cancer],
one_dim_search='armiho', max_time=10, search_kwargs=dict([('c', 0.7)]))
a3, i3, t3, v3, r3, fc3, gc3, hc3 = optimization_task(oracle, w0_cancer, method='gradient descent', args=[X_cancer, labels_cancer],
one_dim_search='armiho', max_time=10, search_kwargs=dict([('c', 0.5)]))
a4, i4, t4, v4, r4, fc4, gc4, hc4 = optimization_task(oracle, w0_cancer, method='gradient descent', args=[X_cancer, labels_cancer],
one_dim_search='armiho', max_time=10, search_kwargs=dict([('c', 0.3)]))
a5, i5, t5, v5, r5, fc5, gc5, hc5 = optimization_task(oracle, w0_cancer, method='gradient descent', args=[X_cancer, labels_cancer],
one_dim_search='armiho', max_time=10, search_kwargs=dict([('c', 0.1)]))
vs = np.array([v1-f_min_cancer, v2-f_min_cancer, v3-f_min_cancer, v4-f_min_cancer, v5-f_min_cancer])
graph_several([t1, t2, t3, t4, t5], vs,
labels=['0.9', '0.7', '0.5', '0.3', '0.1'],
x_l='time', y_l='$\log(|f(w_k)-f_{min}|)$', title='результаты для разных c от времени (cancer)')
vs = np.array([v1-f_min_cancer, v2-f_min_cancer, v3-f_min_cancer, v4-f_min_cancer, v5-f_min_cancer])
graph_several([i1, i2, i3, i4, i5], vs,
labels=['0.9', '0.7', '0.5', '0.3', '0.1'],
x_l='iteration', y_l='$\log(|f(w_k)-f_{min}|)$', title='результаты для разных c от номера итерации (cancer)')
Для $c$, близких к 1, за одну итерацию функция не может сильно уменьшиться. Впредь будем использовать $c$ = 0.1 - при меньших значениях ситауция не особо меняется.
a1, i1, t1, v1, r1, fc1, gc1, hc1 = optimization_task(oracle, w0_a1a, method='gradient descent', args=[X_a1a, labels_a1a],
one_dim_search='armiho', max_time=10, search_kwargs=dict([('k', 50)]))
a2, i2, t2, v2, r2, fc2, gc2, hc2 = optimization_task(oracle, w0_a1a, method='gradient descent', args=[X_a1a, labels_a1a],
one_dim_search='armiho', max_time=10, search_kwargs=dict([('k', 30)]))
a3, i3, t3, v3, r3, fc3, gc3, hc3 = optimization_task(oracle, w0_a1a, method='gradient descent', args=[X_a1a, labels_a1a],
one_dim_search='armiho', max_time=10, search_kwargs=dict([('k', 20)]))
a4, i4, t4, v4, r4, fc4, gc4, hc4 = optimization_task(oracle, w0_a1a, method='gradient descent', args=[X_a1a, labels_a1a],
one_dim_search='armiho', max_time=10, search_kwargs=dict([('k', 10)]))
a5, i5, t5, v5, r5, fc5, gc5, hc5 = optimization_task(oracle, w0_a1a, method='gradient descent', args=[X_a1a, labels_a1a],
one_dim_search='armiho', max_time=10, search_kwargs=dict([('k', 5)]))
vs = np.array([v1-f_min_a1a, v2-f_min_a1a, v3-f_min_a1a, v4-f_min_a1a, v5-f_min_a1a])
graph_several([t1, t2, t3, t4, t5], vs,
labels=['50', '30', '20', '10', '5'],
x_l='time', y_l='$\log(|f(w_k)-f_{min}|)$', title='результаты для разных k от времени (a1a)')
vs = np.array([v1-f_min_a1a, v2-f_min_a1a, v3-f_min_a1a, v4-f_min_a1a, v5-f_min_a1a])
graph_several([i1, i2, i3, i4, i5], vs,
labels=['50', '30', '20', '10', '5'],
x_l='iteration', y_l='$\log(|f(w_k)-f_{min}|)$', title='результаты для разных k от номера итерации (a1a)')
a1, i1, t1, v1, r1, fc1, gc1, hc1 = optimization_task(oracle, w0_cancer, method='gradient descent', args=[X_cancer, labels_cancer],
one_dim_search='armiho', max_time=10, search_kwargs=dict([('k', 50)]))
a2, i2, t2, v2, r2, fc2, gc2, hc2 = optimization_task(oracle, w0_cancer, method='gradient descent', args=[X_cancer, labels_cancer],
one_dim_search='armiho', max_time=10, search_kwargs=dict([('k', 30)]))
a3, i3, t3, v3, r3, fc3, gc3, hc3 = optimization_task(oracle, w0_cancer, method='gradient descent', args=[X_cancer, labels_cancer],
one_dim_search='armiho', max_time=10, search_kwargs=dict([('k', 20)]))
a4, i4, t4, v4, r4, fc4, gc4, hc4 = optimization_task(oracle, w0_cancer, method='gradient descent', args=[X_cancer, labels_cancer],
one_dim_search='armiho', max_time=10, search_kwargs=dict([('k', 10)]))
a5, i5, t5, v5, r5, fc5, gc5, hc5 = optimization_task(oracle, w0_cancer, method='gradient descent', args=[X_cancer, labels_cancer],
one_dim_search='armiho', max_time=10, search_kwargs=dict([('k', 5)]))
vs = np.array([v1-f_min_cancer, v2-f_min_cancer, v3-f_min_cancer, v4-f_min_cancer, v5-f_min_cancer])
graph_several([t1, t2, t3, t4, t5], vs,
labels=['50', '30', '20', '10', '5'],
x_l='time', y_l='$\log(|f(w_k)-f_{min}|)$', title='результаты для разных k от времени (cancer)')
vs = np.array([v1-f_min_cancer, v2-f_min_cancer, v3-f_min_cancer, v4-f_min_cancer, v5-f_min_cancer])
graph_several([i1, i2, i3, i4, i5], vs,
labels=['50', '30', '20', '10', '5'], end=800,
x_l='iteration', y_l='$\log(|f(w_k)-f_{min}|)$', title='результаты для разных k от номера итерации (cancer)')
Будем использовать $k=10$.
Зависимость от $c_1$ (коэфицента для условия Армихо) похожа на аналогичную в методе Армихо, так что здесь не представлена
a1, i1, t1, v1, r1, fc1, gc1, hc1 = optimization_task(oracle, w0_a1a, method='gradient descent', args=[X_a1a, labels_a1a],
one_dim_search='wolf', max_time=10, search_kwargs=dict([('c2', 0.9)]))
a2, i2, t2, v2, r2, fc2, gc2, hc2 = optimization_task(oracle, w0_a1a, method='gradient descent', args=[X_a1a, labels_a1a],
one_dim_search='wolf', max_time=10, search_kwargs=dict([('c2', 0.5)]))
a3, i3, t3, v3, r3, fc3, gc3, hc3 = optimization_task(oracle, w0_a1a, method='gradient descent', args=[X_a1a, labels_a1a],
one_dim_search='wolf', max_time=10, search_kwargs=dict([('c2', 0.3)]))
a4, i4, t4, v4, r4, fc4, gc4, hc4 = optimization_task(oracle, w0_a1a, method='gradient descent', args=[X_a1a, labels_a1a],
one_dim_search='wolf', max_time=10, search_kwargs=dict([('c2', 0.1)]))
a5, i5, t5, v5, r5, fc5, gc5, hc5 = optimization_task(oracle, w0_a1a, method='gradient descent', args=[X_a1a, labels_a1a],
one_dim_search='wolf', max_time=10, search_kwargs=dict([('c2', 0.001)]))
vs = np.array([v1-f_min_a1a, v2-f_min_a1a, v3-f_min_a1a, v4-f_min_a1a, v5-f_min_a1a])
graph_several2([t1, t2, t3, t4, t5], vs,
labels=['0.9', '0.5', '0.3', '0.1', '0.001'], beg2=2,
x_l='time', y_l='$\log(|f(w_k)-f_{min}|)$', title='результаты для разных $c_2$ от времени (a1a)')
vs = np.array([v1-f_min_a1a, v2-f_min_a1a, v3-f_min_a1a, v4-f_min_a1a, v5-f_min_a1a])
graph_several2([i1, i2, i3, i4, i5], vs,
labels=['0.9', '0.5', '0.3', '0.1', '0.001'], end1=100, beg2=20, end2=100,
x_l='iteration', y_l='$\log(|f(w_k)-f_{min}|)$', title='результаты для разных $c_2$ от номера итерации (a1a)')
a1, i1, t1, v1, r1, fc1, gc1, hc1 = optimization_task(oracle, w0_cancer, method='gradient descent', args=[X_cancer, labels_cancer],
one_dim_search='wolf', max_time=10, search_kwargs=dict([('c2', 0.9)]))
a2, i2, t2, v2, r2, fc2, gc2, hc2 = optimization_task(oracle, w0_cancer, method='gradient descent', args=[X_cancer, labels_cancer],
one_dim_search='wolf', max_time=10, search_kwargs=dict([('c2', 0.5)]))
a3, i3, t3, v3, r3, fc3, gc3, hc3 = optimization_task(oracle, w0_cancer, method='gradient descent', args=[X_cancer, labels_cancer],
one_dim_search='wolf', max_time=10, search_kwargs=dict([('c2', 0.3)]))
a4, i4, t4, v4, r4, fc4, gc4, hc4 = optimization_task(oracle, w0_cancer, method='gradient descent', args=[X_cancer, labels_cancer],
one_dim_search='wolf', max_time=10, search_kwargs=dict([('c2', 0.1)]))
a5, i5, t5, v5, r5, fc5, gc5, hc5 = optimization_task(oracle, w0_cancer, method='gradient descent', args=[X_cancer, labels_cancer],
one_dim_search='wolf', max_time=10, search_kwargs=dict([('c2', 0.001)]))
vs = np.array([v1-f_min_cancer, v2-f_min_cancer, v3-f_min_cancer, v4-f_min_cancer, v5-f_min_cancer])
graph_several2([t1, t2, t3, t4, t5], vs,
labels=['0.9', '0.5', '0.3', '0.1', '0.001'], beg2=4,
x_l='time', y_l='$\log(|f(w_k)-f_{min}|)$', title='результаты для разных $c_2$ от времени (cancer)')
vs = np.array([v1-f_min_cancer, v2-f_min_cancer, v3-f_min_cancer, v4-f_min_cancer, v5-f_min_cancer])
graph_several([i1, i2, i3, i4, i5], vs,
labels=['0.9', '0.5', '0.3', '0.1', '0.001'], end=100,
x_l='iteration', y_l='$\log(|f(w_k)-f_{min}|)$', title='результаты для разных $c_2$ от номера итерации (cancer)')
Не сильно большая разница, будем использовать $c_2=0.9$ - по умолчанию в linesearch
a1, i1, t1, v1, r1, fc1, gc1, hc1 = optimization_task(oracle, w0_rand, method='gradient descent', args=[X_rand, labels_rand],
one_dim_search='wolf', max_time=10, search_kwargs=dict([('c2', 0.9)]))
a2, i2, t2, v2, r2, fc2, gc2, hc2 = optimization_task(oracle, w0_rand, method='gradient descent', args=[X_rand, labels_rand],
one_dim_search='wolf', max_time=10, search_kwargs=dict([('c2', 0.2)]))
a3, i3, t3, v3, r3, fc3, gc3, hc3 = optimization_task(oracle, w0_rand, method='gradient descent', args=[X_rand, labels_rand],
one_dim_search='wolf', max_time=10, search_kwargs=dict([('c2', 0.15)]))
a4, i4, t4, v4, r4, fc4, gc4, hc4 = optimization_task(oracle, w0_rand, method='gradient descent', args=[X_rand, labels_rand],
one_dim_search='wolf', max_time=10, search_kwargs=dict([('c2', 0.1)]))
a5, i5, t5, v5, r5, fc5, gc5, hc5 = optimization_task(oracle, w0_rand, method='gradient descent', args=[X_rand, labels_rand],
one_dim_search='wolf', max_time=10, search_kwargs=dict([('c2', 0.001)]))
Вероятно, что метод Вульфа не сходится из-за того, что минимума нет впринципе: поскольку датасет был получен разделением точек гиперплоскостью, оптимальным $w$ будет вектор нормали к этой гиперплоскости с наибольшей нормой $|w|$ (т.е. инфимум функции достигается при $|w|\rightarrow \infty$)
a1, i1, t1, v1, r1, fc1, gc1, hc1 = optimization_task(oracle, w0_a1a, method='gradient descent', args=[X_a1a, labels_a1a],
one_dim_search='brent', max_time=30)
a2, i2, t2, v2, r2, fc2, gc2, hc2 = optimization_task(oracle, w0_a1a, method='gradient descent', args=[X_a1a, labels_a1a],
one_dim_search='golden', max_time=30, search_kwargs=dict([('eps', 0.00001)]))
a3, i3, t3, v3, r3, fc3, gc3, hc3 = optimization_task(oracle, w0_a1a, method='gradient descent', args=[X_a1a, labels_a1a],
one_dim_search='armiho', max_time=30)
a4, i4, t4, v4, r4, fc4, gc4, hc4 = optimization_task(oracle, w0_a1a, method='gradient descent', args=[X_a1a, labels_a1a],
one_dim_search='wolf', max_time=30)
a5, i5, t5, v5, r5, fc5, gc5, hc5 = optimization_task(oracle, w0_a1a, method='gradient descent', args=[X_a1a, labels_a1a],
one_dim_search='nester', max_time=30)
vs = np.array([v1-f_min_cancer, v2-f_min_cancer, v3-f_min_cancer, v4-f_min_cancer, v5-f_min_cancer])
graph_several2([t1, t2, t3, t4, t5], vs,
labels=['brent', 'golden', 'armiho', 'wolf', 'nester'], end2=2,
x_l='time', y_l='$\log(|f(w_k)-f_{min}|)$')
graph_several2([t1, t2, t3, t4, t5], [r1, r2, r3, r4, r5],
labels=['brent', 'golden', 'armiho', 'wolf', 'nester'], end2=2,
x_l='time', y_l='$\log(|d f(w_k)|^2/|d f(w_0)|^2)$')
graph_several2([fc1, fc2, fc3, fc4, fc5], vs,
labels=['brent', 'golden', 'armiho', 'wolf', 'nester'], end1=1000, end2=100,
x_l='function call number', y_l='$\log(|f(w_k)-f_{min}|)$')
graph_several2([fc1, fc2, fc3, fc4, fc5], [r1, r2, r3, r4, r5],
labels=['brent', 'golden', 'armiho', 'wolf', 'nester'], end1=1000, end2=100,
x_l='function call number', y_l='$\log(| df(w_k)|^2/| df(w_0)|^2)$')
graph_several2([i1, i2, i3, i4, i5], vs,
labels=['brent', 'golden', 'armiho', 'wolf', 'nester'], end1=100, end2=15,
x_l='iterations', y_l='$\log(|f(w_k)-f_{min}|)$')
graph_several2([i1, i2, i3, i4, i5], [r1, r2, r3, r4, r5],
labels=['brent', 'golden', 'armiho', 'wolf', 'nester'], end1=100, end2=15,
x_l='iterations', y_l='$\log(| df(w_k)|^2/| df(w_0)|^2)$')
a1, i1, t1, v1, r1, fc1, gc1, hc1 = optimization_task(oracle, w0_cancer, method='gradient descent', args=[X_cancer, labels_cancer],
one_dim_search='brent', max_time=30)
a2, i2, t2, v2, r2, fc2, gc2, hc2 = optimization_task(oracle, w0_cancer, method='gradient descent', args=[X_cancer, labels_cancer],
one_dim_search='golden', max_time=30, search_kwargs=dict([('eps', 0.00001)]))
a3, i3, t3, v3, r3, fc3, gc3, hc3 = optimization_task(oracle, w0_cancer, method='gradient descent', args=[X_cancer, labels_cancer],
one_dim_search='armiho', max_time=30)
a4, i4, t4, v4, r4, fc4, gc4, hc4 = optimization_task(oracle, w0_cancer, method='gradient descent', args=[X_cancer, labels_cancer],
one_dim_search='wolf', max_time=30)
a5, i5, t5, v5, r5, fc5, gc5, hc5 = optimization_task(oracle, w0_cancer, method='gradient descent', args=[X_cancer, labels_cancer],
one_dim_search='nester', max_time=30)
vs = np.array([v1-f_min_cancer, v2-f_min_cancer, v3-f_min_cancer, v4-f_min_cancer, v5-f_min_cancer])
graph_several2([t1, t2, t3, t4, t5], vs,
labels=['brent', 'golden', 'armiho', 'wolf', 'nester'], end2=2,
x_l='time', y_l='$\log(|f(w_k)-f_{min}|)$')
graph_several2([t1, t2, t3, t4, t5], [r1, r2, r3, r4, r5],
labels=['brent', 'golden', 'armiho', 'wolf', 'nester'], end2=2,
x_l='time', y_l='$\log(|d f(w_k)|^2/|d f(w_0)|^2)$')
graph_several2([fc1, fc2, fc3, fc4, fc5], vs,
labels=['brent', 'golden', 'armiho', 'wolf', 'nester'], end2=200,
x_l='function call number', y_l='$\log(|f(w_k)-f_{min}|)$')
graph_several2([fc1, fc2, fc3, fc4, fc5], [r1, r2, r3, r4, r5],
labels=['brent', 'golden', 'armiho', 'wolf', 'nester'], end2=200,
x_l='function call number', y_l='$\log(| df(w_k)|^2/| df(w_0)|^2)$')
graph_several2([i1, i2, i3, i4, i5], vs,
labels=['brent', 'golden', 'armiho', 'wolf', 'nester'], end1=200, end2=20,
x_l='iterations', y_l='$\log(|f(w_k)-f_{min}|)$')
graph_several2([i1, i2, i3, i4, i5], [r1, r2, r3, r4, r5],
labels=['brent', 'golden', 'armiho', 'wolf', 'nester'], end1=200, end2=20,
x_l='iterations', y_l='$\log(| df(w_k)|^2/| df(w_0)|^2)$')
a1, i1, t1, v1, r1, fc1, gc1, hc1 = optimization_task(oracle, w0_rand, method='gradient descent', args=[X_rand, labels_rand],
one_dim_search='brent', max_time=30)
a2, i2, t2, v2, r2, fc2, gc2, hc2 = optimization_task(oracle, w0_rand, method='gradient descent', args=[X_rand, labels_rand],
one_dim_search='golden', max_time=30, search_kwargs=dict([('eps', 0.00001)]))
a3, i3, t3, v3, r3, fc3, gc3, hc3 = optimization_task(oracle, w0_rand, method='gradient descent', args=[X_rand, labels_rand],
one_dim_search='armiho', max_time=30)
a4, i4, t4, v4, r4, fc4, gc4, hc4 = optimization_task(oracle, w0_rand, method='gradient descent', args=[X_rand, labels_rand],
one_dim_search='wolf', max_time=30)
a5, i5, t5, v5, r5, fc5, gc5, hc5 = optimization_task(oracle, w0_rand, method='gradient descent', args=[X_rand, labels_rand],
one_dim_search='nester', max_time=30)
vs = np.array([v1-f_min_cancer, v2-f_min_cancer, v3-f_min_cancer, v4-f_min_cancer, v5-f_min_cancer])
graph_several([t1, t2, t3, t4, t5], vs,
labels=['brent', 'golden', 'armiho', 'wolf', 'nester'],
x_l='time', y_l='$\log(|f(w_k)-f_{min}|)$')
graph_several2([t1, t2, t3, t4, t5], [r1, r2, r3, r4, r5],
labels=['brent', 'golden', 'armiho', 'wolf', 'nester'], end2=2,
x_l='time', y_l='$\log(|d f(w_k)|^2/|d f(w_0)|^2)$')
graph_several([fc1, fc2, fc3, fc4, fc5], vs,
labels=['brent', 'golden', 'armiho', 'wolf', 'nester'],
x_l='function call number', y_l='$\log(|f(w_k)-f_{min}|)$')
graph_several2([fc1, fc2, fc3, fc4, fc5], [r1, r2, r3, r4, r5],
labels=['brent', 'golden', 'armiho', 'wolf', 'nester'], end2=100,
x_l='function call number', y_l='$\log(| df(w_k)|^2/| df(w_0)|^2)$')
graph_several([i1, i2, i3, i4, i5], vs,
labels=['brent', 'golden', 'armiho', 'wolf', 'nester'],
x_l='iterations', y_l='$\log(|f(w_k)-f_{min}|)$')
graph_several2([i1, i2, i3, i4, i5], [r1, r2, r3, r4, r5],
labels=['brent', 'golden', 'armiho', 'wolf', 'nester'], end1=100, end2=20,
x_l='iterations', y_l='$\log(| df(w_k)|^2/| df(w_0)|^2)$')
Заранее поссчитанные outers для датасетов:
outers_a1a = scipy.sparse.csr_matrix([np.outer(x, x).flatten() for x in X_a1a.todense()])
outers_cancer = scipy.sparse.csr_matrix([np.outer(x, x).flatten() for x in X_cancer.todense()])
outers_rand = scipy.sparse.csr_matrix([np.outer(x, x).flatten() for x in X_rand.todense()])
Это оправданные затраты:
_, _, t1, _, _, _, _, _ = optimization_task(oracle, w0_a1a, method='newton', args=[X_a1a, labels_a1a], epsilon=1e-9)
_, _, t2, _, _, _, _, _ = optimization_task(oracle, w0_a1a, method='newton', args=[X_a1a, labels_a1a, outers_a1a], epsilon=1e-9)
print('time without outers =', t1[-1])
print('time with outers =', t2[-1])
a1, i1, t1, v1, r1, fc1, gc1, hc1 = optimization_task(oracle, w0_a1a, method='newton', args=[X_a1a, labels_a1a, outers_a1a],
one_dim_search='armiho', max_time=0.75, search_kwargs=dict([('c', 0.9)]))
a2, i2, t2, v2, r2, fc2, gc2, hc2 = optimization_task(oracle, w0_a1a, method='newton', args=[X_a1a, labels_a1a, outers_a1a],
one_dim_search='armiho', max_time=0.75, search_kwargs=dict([('c', 0.7)]))
a3, i3, t3, v3, r3, fc3, gc3, hc3 = optimization_task(oracle, w0_a1a, method='newton', args=[X_a1a, labels_a1a, outers_a1a],
one_dim_search='armiho', max_time=0.75, search_kwargs=dict([('c', 0.5)]))
a4, i4, t4, v4, r4, fc4, gc4, hc4 = optimization_task(oracle, w0_a1a, method='newton', args=[X_a1a, labels_a1a, outers_a1a],
one_dim_search='armiho', max_time=0.75, search_kwargs=dict([('c', 0.3)]))
a5, i5, t5, v5, r5, fc5, gc5, hc5 = optimization_task(oracle, w0_a1a, method='newton', args=[X_a1a, labels_a1a, outers_a1a],
one_dim_search='armiho', max_time=0.75, search_kwargs=dict([('c', 0.1)]))
vs = np.array([v1-f_min_a1a, v2-f_min_a1a, v3-f_min_a1a, v4-f_min_a1a, v5-f_min_a1a])
graph_several([t1, t2, t3, t4, t5], vs,
labels=['0.9', '0.7', '0.5', '0.3', '0.1'],
x_l='time', y_l='$\log(|f(w_k)-f_{min}|)$', title='результаты для разных c от времени (a1a)')
a1, i1, t1, v1, r1, fc1, gc1, hc1 = optimization_task(oracle, w0_cancer, method='newton', args=[X_cancer, labels_cancer, outers_cancer],
one_dim_search='armiho', max_time=0.2, search_kwargs=dict([('c', 0.9)]))
a2, i2, t2, v2, r2, fc2, gc2, hc2 = optimization_task(oracle, w0_cancer, method='newton', args=[X_cancer, labels_cancer, outers_cancer],
one_dim_search='armiho', max_time=0.2, search_kwargs=dict([('c', 0.7)]))
a3, i3, t3, v3, r3, fc3, gc3, hc3 = optimization_task(oracle, w0_cancer, method='newton', args=[X_cancer, labels_cancer, outers_cancer],
one_dim_search='armiho', max_time=0.2, search_kwargs=dict([('c', 0.5)]))
a4, i4, t4, v4, r4, fc4, gc4, hc4 = optimization_task(oracle, w0_cancer, method='newton', args=[X_cancer, labels_cancer, outers_cancer],
one_dim_search='armiho', max_time=0.2, search_kwargs=dict([('c', 0.3)]))
a5, i5, t5, v5, r5, fc5, gc5, hc5 = optimization_task(oracle, w0_cancer, method='newton', args=[X_cancer, labels_cancer, outers_cancer],
one_dim_search='armiho', max_time=0.2, search_kwargs=dict([('c', 0.1)]))
vs = np.array([v1-f_min_cancer, v2-f_min_cancer, v3-f_min_cancer, v4-f_min_cancer, v5-f_min_cancer])
graph_several([t1, t2, t3, t4, t5], vs,
labels=['0.9', '0.7', '0.5', '0.3', '0.1'],
x_l='time', y_l='$\log(|f(w_k)-f_{min}|)$', title='результаты для разных c от времени (cancer)')
a1, i1, t1, v1, r1, fc1, gc1, hc1 = optimization_task(oracle, w0_a1a, method='newton', args=[X_a1a, labels_a1a, outers_a1a],
one_dim_search='armiho', max_time=0.75, search_kwargs=dict([('k', 50)]))
a2, i2, t2, v2, r2, fc2, gc2, hc2 = optimization_task(oracle, w0_a1a, method='newton', args=[X_a1a, labels_a1a, outers_a1a],
one_dim_search='armiho', max_time=0.75, search_kwargs=dict([('k', 30)]))
a3, i3, t3, v3, r3, fc3, gc3, hc3 = optimization_task(oracle, w0_a1a, method='newton', args=[X_a1a, labels_a1a, outers_a1a],
one_dim_search='armiho', max_time=0.75, search_kwargs=dict([('k', 20)]))
a4, i4, t4, v4, r4, fc4, gc4, hc4 = optimization_task(oracle, w0_a1a, method='newton', args=[X_a1a, labels_a1a, outers_a1a],
one_dim_search='armiho', max_time=0.75, search_kwargs=dict([('k', 10)]))
a5, i5, t5, v5, r5, fc5, gc5, hc5 = optimization_task(oracle, w0_a1a, method='newton', args=[X_a1a, labels_a1a, outers_a1a],
one_dim_search='armiho', max_time=0.75, search_kwargs=dict([('k', 5)]))
vs = np.array([v1-f_min_a1a, v2-f_min_a1a, v3-f_min_a1a, v4-f_min_a1a, v5-f_min_a1a])
graph_several([t1, t2, t3, t4, t5], vs,
labels=['50', '30', '20', '10', '5'],
x_l='time', y_l='$\log(|f(w_k)-f_{min}|)$', title='результаты для разных k от времени (a1a)')
a1, i1, t1, v1, r1, fc1, gc1, hc1 = optimization_task(oracle, w0_cancer, method='newton', args=[X_cancer, labels_cancer, outers_cancer],
one_dim_search='armiho', max_time=0.15, search_kwargs=dict([('k', 50)]))
a2, i2, t2, v2, r2, fc2, gc2, hc2 = optimization_task(oracle, w0_cancer, method='newton', args=[X_cancer, labels_cancer, outers_cancer],
one_dim_search='armiho', max_time=0.15, search_kwargs=dict([('k', 30)]))
a3, i3, t3, v3, r3, fc3, gc3, hc3 = optimization_task(oracle, w0_cancer, method='newton', args=[X_cancer, labels_cancer, outers_cancer],
one_dim_search='armiho', max_time=0.15, search_kwargs=dict([('k', 20)]))
a4, i4, t4, v4, r4, fc4, gc4, hc4 = optimization_task(oracle, w0_cancer, method='newton', args=[X_cancer, labels_cancer, outers_cancer],
one_dim_search='armiho', max_time=0.15, search_kwargs=dict([('k', 10)]))
a5, i5, t5, v5, r5, fc5, gc5, hc5 = optimization_task(oracle, w0_cancer, method='newton', args=[X_cancer, labels_cancer, outers_cancer],
one_dim_search='armiho', max_time=0.15, search_kwargs=dict([('k', 5)]))
vs = np.array([v1-f_min_cancer, v2-f_min_cancer, v3-f_min_cancer, v4-f_min_cancer, v5-f_min_cancer])
graph_several([t1, t2, t3, t4, t5], vs,
labels=['50', '30', '20', '10', '5'],
x_l='time', y_l='$\log(|f(w_k)-f_{min}|)$', title='результаты для разных k от времени (cancer)')
Ситуация не сильно отличается от случая градиентного спуска.
a1, i1, t1, v1, r1, fc1, gc1, hc1 = optimization_task(oracle, w0_a1a, method='newton', args=[X_a1a, labels_a1a, outers_a1a],
one_dim_search='wolf', max_time=1, search_kwargs=dict([('c2', 0.9)]))
a2, i2, t2, v2, r2, fc2, gc2, hc2 = optimization_task(oracle, w0_a1a, method='newton', args=[X_a1a, labels_a1a, outers_a1a],
one_dim_search='wolf', max_time=1, search_kwargs=dict([('c2', 0.5)]))
a3, i3, t3, v3, r3, fc3, gc3, hc3 = optimization_task(oracle, w0_a1a, method='newton', args=[X_a1a, labels_a1a, outers_a1a],
one_dim_search='wolf', max_time=1, search_kwargs=dict([('c2', 0.3)]))
a4, i4, t4, v4, r4, fc4, gc4, hc4 = optimization_task(oracle, w0_a1a, method='newton', args=[X_a1a, labels_a1a, outers_a1a],
one_dim_search='wolf', max_time=1, search_kwargs=dict([('c2', 0.1)]))
a5, i5, t5, v5, r5, fc5, gc5, hc5 = optimization_task(oracle, w0_a1a, method='newton', args=[X_a1a, labels_a1a, outers_a1a],
one_dim_search='wolf', max_time=1, search_kwargs=dict([('c2', 0.001)]))
vs = np.array([v1-f_min_a1a, v2-f_min_a1a, v3-f_min_a1a, v4-f_min_a1a, v5-f_min_a1a])
graph_several([t1, t2, t3, t4, t5], vs,
labels=['0.9', '0.5', '0.3', '0.1', '0.001'],
x_l='time', y_l='$\log(|f(w_k)-f_{min}|)$', title='результаты для разных $c_2$ от времени (a1a)')
a1, i1, t1, v1, r1, fc1, gc1, hc1 = optimization_task(oracle, w0_cancer, method='newton', args=[X_cancer, labels_cancer, outers_cancer],
one_dim_search='wolf', max_time=0.1, search_kwargs=dict([('c2', 0.9)]))
a2, i2, t2, v2, r2, fc2, gc2, hc2 = optimization_task(oracle, w0_cancer, method='newton', args=[X_cancer, labels_cancer, outers_cancer],
one_dim_search='wolf', max_time=0.1, search_kwargs=dict([('c2', 0.5)]))
a3, i3, t3, v3, r3, fc3, gc3, hc3 = optimization_task(oracle, w0_cancer, method='newton', args=[X_cancer, labels_cancer, outers_cancer],
one_dim_search='wolf', max_time=0.1, search_kwargs=dict([('c2', 0.3)]))
a4, i4, t4, v4, r4, fc4, gc4, hc4 = optimization_task(oracle, w0_cancer, method='newton', args=[X_cancer, labels_cancer, outers_cancer],
one_dim_search='wolf', max_time=0.1, search_kwargs=dict([('c2', 0.1)]))
a5, i5, t5, v5, r5, fc5, gc5, hc5 = optimization_task(oracle, w0_cancer, method='newton', args=[X_cancer, labels_cancer, outers_cancer],
one_dim_search='wolf', max_time=0.1, search_kwargs=dict([('c2', 0.001)]))
vs = np.array([v1-f_min_cancer, v2-f_min_cancer, v3-f_min_cancer, v4-f_min_cancer, v5-f_min_cancer])
graph_several([t1, t2, t3, t4, t5], vs,
labels=['0.9', '0.5', '0.3', '0.1', '0.001'],
x_l='time', y_l='$\log(|f(w_k)-f_{min}|)$', title='результаты для разных $c_2$ от времени (cancer)')
$c_2=0.9$ по умолчанию - хорошее решение
a1, i1, t1, v1, r1, fc1, gc1, hc1 = optimization_task(oracle, w0_a1a, method='newton', args=[X_a1a, labels_a1a, outers_a1a],
one_dim_search='brent', max_time=1)
a2, i2, t2, v2, r2, fc2, gc2, hc2 = optimization_task(oracle, w0_a1a, method='newton', args=[X_a1a, labels_a1a, outers_a1a],
one_dim_search='golden', max_time=1, search_kwargs=dict([('eps', 0.00001)]))
a3, i3, t3, v3, r3, fc3, gc3, hc3 = optimization_task(oracle, w0_a1a, method='newton', args=[X_a1a, labels_a1a, outers_a1a],
one_dim_search='armiho', max_time=1)
a4, i4, t4, v4, r4, fc4, gc4, hc4 = optimization_task(oracle, w0_a1a, method='newton', args=[X_a1a, labels_a1a, outers_a1a],
one_dim_search='wolf', max_time=1)
a5, i5, t5, v5, r5, fc5, gc5, hc5 = optimization_task(oracle, w0_a1a, method='newton', args=[X_a1a, labels_a1a, outers_a1a],
one_dim_search='unit step', max_time=1)
vs = np.array([v1-f_min_cancer, v2-f_min_cancer, v3-f_min_cancer, v4-f_min_cancer, v5-f_min_cancer])
graph_several2([t1, t2, t3, t4, t5], vs,
labels=['brent', 'golden', 'armiho', 'wolf', 'unit step'], end2=0.1,
x_l='time', y_l='$\log(|f(w_k)-f_{min}|)$')
graph_several2([t1, t2, t3, t4, t5], [r1, r2, r3, r4, r5],
labels=['brent', 'golden', 'armiho', 'wolf', 'unit step'], end2=0.2,
x_l='time', y_l='$\log(|d f(w_k)|^2/|d f(w_0)|^2)$')
graph_several2([fc1, fc2, fc3, fc4, fc5], vs,
labels=['brent', 'golden', 'armiho', 'wolf', 'unit step'], end2=5,
x_l='function call number', y_l='$\log(|f(w_k)-f_{min}|)$')
graph_several2([fc1, fc2, fc3, fc4, fc5], [r1, r2, r3, r4, r5],
labels=['brent', 'golden', 'armiho', 'wolf', 'unit step'], end2=20,
x_l='function call number', y_l='$\log(| df(w_k)|^2/| df(w_0)|^2)$')
graph_several2([i1, i2, i3, i4, i5], vs,
labels=['brent', 'golden', 'armiho', 'wolf', 'unit step'], end2=3,
x_l='iterations', y_l='$\log(|f(w_k)-f_{min}|)$')
graph_several2([i1, i2, i3, i4, i5], [r1, r2, r3, r4, r5],
labels=['brent', 'golden', 'armiho', 'wolf', 'unit step'], end2=7,
x_l='iterations', y_l='$\log(| df(w_k)|^2/| df(w_0)|^2)$')
a1, i1, t1, v1, r1, fc1, gc1, hc1 = optimization_task(oracle, w0_cancer, method='newton', args=[X_cancer, labels_cancer, outers_cancer],
one_dim_search='brent', max_time=1)
a2, i2, t2, v2, r2, fc2, gc2, hc2 = optimization_task(oracle, w0_cancer, method='newton', args=[X_cancer, labels_cancer, outers_cancer],
one_dim_search='golden', max_time=1, search_kwargs=dict([('eps', 0.00001)]))
a3, i3, t3, v3, r3, fc3, gc3, hc3 = optimization_task(oracle, w0_cancer, method='newton', args=[X_cancer, labels_cancer, outers_cancer],
one_dim_search='armiho', max_time=1)
a4, i4, t4, v4, r4, fc4, gc4, hc4 = optimization_task(oracle, w0_cancer, method='newton', args=[X_cancer, labels_cancer, outers_cancer],
one_dim_search='wolf', max_time=1)
a5, i5, t5, v5, r5, fc5, gc5, hc5 = optimization_task(oracle, w0_cancer, method='newton', args=[X_cancer, labels_cancer, outers_cancer],
one_dim_search='unit step', max_time=1)
vs = np.array([v1-f_min_cancer, v2-f_min_cancer, v3-f_min_cancer, v4-f_min_cancer, v5-f_min_cancer])
graph_several2([t1, t2, t3, t4, t5], vs,
labels=['brent', 'golden', 'armiho', 'wolf', 'unit step'], end2=0.1,
x_l='time', y_l='$\log(|f(w_k)-f_{min}|)$')
graph_several2([t1, t2, t3, t4, t5], [r1, r2, r3, r4, r5],
labels=['brent', 'golden', 'armiho', 'wolf', 'unit step'], end2=0.1,
x_l='time', y_l='$\log(|d f(w_k)|^2/|d f(w_0)|^2)$')
graph_several2([fc1, fc2, fc3, fc4, fc5], vs,
labels=['brent', 'golden', 'armiho', 'wolf', 'unit step'], end2=15,
x_l='function call number', y_l='$\log(|f(w_k)-f_{min}|)$')
graph_several2([fc1, fc2, fc3, fc4, fc5], [r1, r2, r3, r4, r5],
labels=['brent', 'golden', 'armiho', 'wolf', 'unit step'], end2=20,
x_l='function call number', y_l='$\log(| df(w_k)|^2/| df(w_0)|^2)$')
graph_several([i1, i2, i3, i4, i5], vs,
labels=['brent', 'golden', 'armiho', 'wolf', 'unit step'],
x_l='iterations', y_l='$\log(|f(w_k)-f_{min}|)$')
graph_several2([i1, i2, i3, i4, i5], [r1, r2, r3, r4, r5],
labels=['brent', 'golden', 'armiho', 'wolf', 'unit step'], end1=60, end2=10,
x_l='iterations', y_l='$\log(| df(w_k)|^2/| df(w_0)|^2)$')
a1, i1, t1, v1, r1, fc1, gc1, hc1 = optimization_task(oracle, w0_rand, method='newton', args=[X_rand, labels_rand, outers_rand],
one_dim_search='brent', max_time=1)
a2, i2, t2, v2, r2, fc2, gc2, hc2 = optimization_task(oracle, w0_rand, method='newton', args=[X_rand, labels_rand, outers_rand],
one_dim_search='golden', max_time=1, search_kwargs=dict([('eps', 0.00001)]))
a3, i3, t3, v3, r3, fc3, gc3, hc3 = optimization_task(oracle, w0_rand, method='newton', args=[X_rand, labels_rand, outers_rand],
one_dim_search='armiho', max_time=1)
a4, i4, t4, v4, r4, fc4, gc4, hc4 = optimization_task(oracle, w0_rand, method='newton', args=[X_rand, labels_rand, outers_rand],
one_dim_search='wolf', max_time=1)
a5, i5, t5, v5, r5, fc5, gc5, hc5 = optimization_task(oracle, w0_rand, method='newton', args=[X_rand, labels_rand, outers_rand],
one_dim_search='unit step', max_time=1)
vs = np.array([v1-f_min_cancer, v2-f_min_cancer, v3-f_min_cancer, v4-f_min_cancer, v5-f_min_cancer])
graph_several([t1, t2, t3, t4, t5], vs,
labels=['brent', 'golden', 'armiho', 'wolf', 'unit step'],
x_l='time', y_l='$\log(|f(w_k)-f_{min}|)$')
graph_several2([t1, t2, t3, t4, t5], [r1, r2, r3, r4, r5],
labels=['brent', 'golden', 'armiho', 'wolf', 'unit step'], end2=0.2,
x_l='time', y_l='$\log(|d f(w_k)|^2/|d f(w_0)|^2)$')
graph_several([fc1, fc2, fc3, fc4, fc5], vs,
labels=['brent', 'golden', 'armiho', 'wolf', 'unit step'],
x_l='function call number', y_l='$\log(|f(w_k)-f_{min}|)$')
graph_several2([fc1, fc2, fc3, fc4, fc5], [r1, r2, r3, r4, r5],
labels=['brent', 'golden', 'armiho', 'wolf', 'unit step'], end2=30,
x_l='function call number', y_l='$\log(| df(w_k)|^2/| df(w_0)|^2)$')
graph_several([i1, i2, i3, i4, i5], vs,
labels=['brent', 'golden', 'armiho', 'wolf', 'nester'],
x_l='iterations', y_l='$\log(|f(w_k)-f_{min}|)$')
graph_several([i1, i2, i3, i4, i5], [r1, r2, r3, r4, r5],
labels=['brent', 'golden', 'armiho', 'wolf', 'unit step'], end=7,
x_l='iterations', y_l='$\log(| df(w_k)|^2/| df(w_0)|^2)$')
Выбор единичного шага является оптимальным решением для метода Ньютона.
В датасете a1a есть полностью нулевые столбцы, из-за чего гессиан при использования метода Ньютона всегда получается вырожденным. Если изначально убрать все нулевые столбцы, то при использовании метода ньютона для решения системы $Hx=g$ можно не прибавлять $\epsilon E$. Тем не менее, np.linalg.cholesk всё равно выдаёт ошибку в связи с вырожденностью матрицы $H$, но scipy.sparse.linalg.cg успешно решает систему.
X = scipy.sparse.csr_matrix(X_a1a)
labels = labels_a1a
w0 = scipy.sparse.csr_matrix(w0_a1a)
ind = [X[:, i].sum() for i in range(X.shape[1])]
offset = 0
for i in range(X.shape[1]):
if ind[i] < 1:
X1 = X[:, :(i - offset)]
X2 = X[:, (i - offset + 1):]
X = scipy.sparse.csr_matrix(scipy.sparse.hstack([X1, X2]))
w0 = scipy.sparse.csr_matrix(scipy.sparse.hstack([w0[:, :(i - offset)], w0[:, (i - offset + 1):]]))
offset += 1
outers = scipy.sparse.csr_matrix([np.outer(x, x).flatten() for x in X.todense()])
w0 = np.array(w0.todense()).reshape(X.shape[1])
_, _, t1, _, r1, _, _, _ = optimization_task(oracle, w0, method='newton', args=[X, labels, outers],
one_dim_search='unit step', max_time=3)
def solve(G, d):
return scipy.sparse.linalg.cg(G, d)[0]
_, _, t2, _, r2, _, _, _ = optimization_task(oracle, w0, method='newton', args=[X, labels, outers],
one_dim_search='unit step', max_time=3)
graph_several([t1, t2], [r1, r2],
labels=['cholesky', 'cg'],
x_l='time', y_l='$\log(| df(w_k)|^2/| df(w_0)|^2)$')